# coding:utf8
import findspark
findspark.init()
from pyspark import SparkConf,SparkContext
if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    rdd = sc.parallelize([('a', 1), ('a', 1), ('b', 1), ('b', 1), ('b', 1)])
    group_by_key = rdd.groupByKey()
    print(group_by_key.map(lambda x:(x[0],sum(list(x[1])))).collect())
